import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
Exploring data
zomatodata = pd.read_csv(r"D:\Projects\EDA Zomato\zomato_restaurants_in_India.csv")
pd.set_option('display.max_columns', None)
zomatodata
| res_id | name | establishment | url | address | city | city_id | locality | latitude | longitude | zipcode | country_id | locality_verbose | cuisines | timings | average_cost_for_two | price_range | currency | highlights | aggregate_rating | rating_text | votes | photo_count | opentable_support | delivery | takeaway | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 3400299 | Bikanervala | ['Quick Bites'] | https://www.zomato.com/agra/bikanervala-khanda... | Kalyani Point, Near Tulsi Cinema, Bypass Road,... | Agra | 34 | Khandari | 27.211450 | 78.002381 | NaN | 1 | Khandari, Agra | North Indian, South Indian, Mithai, Street Foo... | 8:30am – 10:30pm (Mon-Sun) | 700 | 2 | Rs. | ['Lunch', 'Takeaway Available', 'Credit Card',... | 4.4 | Very Good | 814 | 154 | 0.0 | -1 | -1 |
| 1 | 3400005 | Mama Chicken Mama Franky House | ['Quick Bites'] | https://www.zomato.com/agra/mama-chicken-mama-... | Main Market, Sadar Bazaar, Agra Cantt, Agra | Agra | 34 | Agra Cantt | 27.160569 | 78.011583 | 282001.0 | 1 | Agra Cantt, Agra | North Indian, Mughlai, Rolls, Chinese, Fast Fo... | 12:30PM to 12Midnight (Mon, Wed, Thu, Fri, Sat... | 600 | 2 | Rs. | ['Delivery', 'No Alcohol Available', 'Dinner',... | 4.4 | Very Good | 1203 | 161 | 0.0 | -1 | -1 |
| 2 | 3401013 | Bhagat Halwai | ['Quick Bites'] | https://www.zomato.com/agra/bhagat-halwai-2-sh... | 62/1, Near Easy Day, West Shivaji Nagar, Goalp... | Agra | 34 | Shahganj | 27.182938 | 77.979684 | 282010.0 | 1 | Shahganj, Agra | Fast Food, Mithai | 9:30 AM to 11 PM | 300 | 1 | Rs. | ['No Alcohol Available', 'Dinner', 'Takeaway A... | 4.2 | Very Good | 801 | 107 | 0.0 | 1 | -1 |
| 3 | 3400290 | Bhagat Halwai | ['Quick Bites'] | https://www.zomato.com/agra/bhagat-halwai-civi... | Near Anjana Cinema, Nehru Nagar, Civil Lines, ... | Agra | 34 | Civil Lines | 27.205668 | 78.004799 | 282002.0 | 1 | Civil Lines, Agra | Desserts, Bakery, Fast Food, South Indian | 8am – 11pm (Mon-Sun) | 300 | 1 | Rs. | ['Takeaway Available', 'Credit Card', 'Lunch',... | 4.3 | Very Good | 693 | 157 | 0.0 | 1 | -1 |
| 4 | 3401744 | The Salt Cafe Kitchen & Bar | ['Casual Dining'] | https://www.zomato.com/agra/the-salt-cafe-kitc... | 1C,3rd Floor, Fatehabad Road, Tajganj, Agra | Agra | 34 | Tajganj | 27.157709 | 78.052421 | NaN | 1 | Tajganj, Agra | North Indian, Continental, Italian | 11:30 AM to 11:30 PM | 1000 | 3 | Rs. | ['Lunch', 'Serves Alcohol', 'Cash', 'Credit Ca... | 4.9 | Excellent | 470 | 291 | 0.0 | 1 | -1 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 211939 | 3202251 | Kali Mirch Cafe And Restaurant | ['Casual Dining'] | https://www.zomato.com/vadodara/kali-mirch-caf... | Manu Smriti Complex, Near Navrachna School, GI... | Vadodara | 32 | Fatehgunj | 22.336931 | 73.192356 | 390024.0 | 1 | Fatehgunj, Vadodara | North Indian | 11am – 3pm, 7pm – 11:30pm (Mon-Sun) | 600 | 2 | Rs. | ['Dinner', 'Cash', 'Lunch', 'Delivery', 'Indoo... | 4.1 | Very Good | 243 | 40 | 0.0 | -1 | -1 |
| 211940 | 3200996 | Raju Omlet | ['Quick Bites'] | https://www.zomato.com/vadodara/raju-omlet-kar... | Mahalaxmi Apartment, Opposite B O B, Karoli Ba... | Vadodara | 32 | Karelibaug | 22.322455 | 73.197203 | NaN | 1 | Karelibaug, Vadodara | Fast Food | 4pm – 11pm (Mon, Tue, Wed, Fri, Sat, Sun), 4pm... | 300 | 1 | Rs. | ['Dinner', 'Cash', 'Takeaway Available', 'Debi... | 4.1 | Very Good | 187 | 40 | 0.0 | 1 | -1 |
| 211941 | 18984164 | The Grand Thakar | ['Casual Dining'] | https://www.zomato.com/vadodara/the-grand-thak... | 3rd Floor, Shreem Shalini Mall, Opposite Conqu... | Vadodara | 32 | Alkapuri | 22.310563 | 73.171163 | NaN | 1 | Alkapuri, Vadodara | Gujarati, North Indian, Chinese | 11:30 AM to 3:30 PM, 7:30 PM to 11 PM | 700 | 2 | Rs. | ['Dinner', 'Cash', 'Debit Card', 'Lunch', 'Tak... | 4.0 | Very Good | 111 | 38 | 0.0 | -1 | -1 |
| 211942 | 3201138 | Subway | ['Quick Bites'] | https://www.zomato.com/vadodara/subway-1-akota... | G-2, Vedant Platina, Near Cosmos, Akota, Vadodara | Vadodara | 32 | Akota | 22.270027 | 73.143068 | NaN | 1 | Akota, Vadodara | Fast Food, Sandwich, Salad | 8 AM to 1 AM | 500 | 2 | Rs. | ['Dinner', 'Delivery', 'Credit Card', 'Lunch',... | 3.7 | Good | 128 | 34 | 0.0 | 1 | -1 |
| 211943 | 18879846 | Freshco's - The Health Cafe | ['Café'] | https://www.zomato.com/vadodara/freshcos-the-h... | Shop 7, Ground Floor, Opposite Natubhai Circle... | Vadodara | 32 | Vadiwadi | 22.309935 | 73.158768 | 390007.0 | 1 | Vadiwadi, Vadodara | Cafe, Healthy Food, Coffee | 7am – 11pm (Mon-Sun) | 600 | 2 | Rs. | ['Dinner', 'Cash', 'Takeaway Available', 'Debi... | 4.0 | Very Good | 93 | 53 | 0.0 | 1 | -1 |
211944 rows × 26 columns
zomatodata.shape
(211944, 26)
The dataset got 26 columns and 211944 rows.
zomatodata.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 211944 entries, 0 to 211943 Data columns (total 26 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 res_id 211944 non-null int64 1 name 211944 non-null object 2 establishment 211944 non-null object 3 url 211944 non-null object 4 address 211810 non-null object 5 city 211944 non-null object 6 city_id 211944 non-null int64 7 locality 211944 non-null object 8 latitude 211944 non-null float64 9 longitude 211944 non-null float64 10 zipcode 48757 non-null object 11 country_id 211944 non-null int64 12 locality_verbose 211944 non-null object 13 cuisines 210553 non-null object 14 timings 208070 non-null object 15 average_cost_for_two 211944 non-null int64 16 price_range 211944 non-null int64 17 currency 211944 non-null object 18 highlights 211944 non-null object 19 aggregate_rating 211944 non-null float64 20 rating_text 211944 non-null object 21 votes 211944 non-null int64 22 photo_count 211944 non-null int64 23 opentable_support 211896 non-null float64 24 delivery 211944 non-null int64 25 takeaway 211944 non-null int64 dtypes: float64(4), int64(9), object(13) memory usage: 42.0+ MB
zomatodata.describe()
| res_id | city_id | latitude | longitude | country_id | average_cost_for_two | price_range | aggregate_rating | votes | photo_count | opentable_support | delivery | takeaway | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 2.119440e+05 | 211944.000000 | 211944.000000 | 211944.000000 | 211944.0 | 211944.000000 | 211944.000000 | 211944.000000 | 211944.000000 | 211944.000000 | 211896.0 | 211944.000000 | 211944.0 |
| mean | 1.349411e+07 | 4746.785434 | 21.499758 | 77.615276 | 1.0 | 595.812229 | 1.882535 | 3.395937 | 378.001864 | 256.971224 | 0.0 | -0.255907 | -1.0 |
| std | 7.883722e+06 | 5568.766386 | 22.781331 | 7.500104 | 0.0 | 606.239363 | 0.892989 | 1.283642 | 925.333370 | 867.668940 | 0.0 | 0.964172 | 0.0 |
| min | 5.000000e+01 | 1.000000 | 0.000000 | 0.000000 | 1.0 | 0.000000 | 1.000000 | 0.000000 | -18.000000 | 0.000000 | 0.0 | -1.000000 | -1.0 |
| 25% | 3.301027e+06 | 11.000000 | 15.496071 | 74.877961 | 1.0 | 250.000000 | 1.000000 | 3.300000 | 16.000000 | 3.000000 | 0.0 | -1.000000 | -1.0 |
| 50% | 1.869573e+07 | 34.000000 | 22.514494 | 77.425971 | 1.0 | 400.000000 | 2.000000 | 3.800000 | 100.000000 | 18.000000 | 0.0 | -1.000000 | -1.0 |
| 75% | 1.881297e+07 | 11306.000000 | 26.841667 | 80.219323 | 1.0 | 700.000000 | 2.000000 | 4.100000 | 362.000000 | 128.000000 | 0.0 | 1.000000 | -1.0 |
| max | 1.915979e+07 | 11354.000000 | 10000.000000 | 91.832769 | 1.0 | 30000.000000 | 4.000000 | 4.900000 | 42539.000000 | 17702.000000 | 0.0 | 1.000000 | -1.0 |
As 'res_id' is a unique identifier for the restaurants, we will use it to identify and remove duplicates.
zomatodata["res_id"].duplicated().sum()
156376
total rows = 211944\ duplicates rows = 156376\
zomatodata.drop_duplicates(["res_id"], keep='first', inplace = True )
zomatodata.shape
(55568, 26)
Total rows after removing duplicates.\ Total rows = 55568
zomato = zomatodata.copy()
zomato.head()
| res_id | name | establishment | url | address | city | city_id | locality | latitude | longitude | zipcode | country_id | locality_verbose | cuisines | timings | average_cost_for_two | price_range | currency | highlights | aggregate_rating | rating_text | votes | photo_count | opentable_support | delivery | takeaway | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 3400299 | Bikanervala | ['Quick Bites'] | https://www.zomato.com/agra/bikanervala-khanda... | Kalyani Point, Near Tulsi Cinema, Bypass Road,... | Agra | 34 | Khandari | 27.211450 | 78.002381 | NaN | 1 | Khandari, Agra | North Indian, South Indian, Mithai, Street Foo... | 8:30am – 10:30pm (Mon-Sun) | 700 | 2 | Rs. | ['Lunch', 'Takeaway Available', 'Credit Card',... | 4.4 | Very Good | 814 | 154 | 0.0 | -1 | -1 |
| 1 | 3400005 | Mama Chicken Mama Franky House | ['Quick Bites'] | https://www.zomato.com/agra/mama-chicken-mama-... | Main Market, Sadar Bazaar, Agra Cantt, Agra | Agra | 34 | Agra Cantt | 27.160569 | 78.011583 | 282001.0 | 1 | Agra Cantt, Agra | North Indian, Mughlai, Rolls, Chinese, Fast Fo... | 12:30PM to 12Midnight (Mon, Wed, Thu, Fri, Sat... | 600 | 2 | Rs. | ['Delivery', 'No Alcohol Available', 'Dinner',... | 4.4 | Very Good | 1203 | 161 | 0.0 | -1 | -1 |
| 2 | 3401013 | Bhagat Halwai | ['Quick Bites'] | https://www.zomato.com/agra/bhagat-halwai-2-sh... | 62/1, Near Easy Day, West Shivaji Nagar, Goalp... | Agra | 34 | Shahganj | 27.182938 | 77.979684 | 282010.0 | 1 | Shahganj, Agra | Fast Food, Mithai | 9:30 AM to 11 PM | 300 | 1 | Rs. | ['No Alcohol Available', 'Dinner', 'Takeaway A... | 4.2 | Very Good | 801 | 107 | 0.0 | 1 | -1 |
| 3 | 3400290 | Bhagat Halwai | ['Quick Bites'] | https://www.zomato.com/agra/bhagat-halwai-civi... | Near Anjana Cinema, Nehru Nagar, Civil Lines, ... | Agra | 34 | Civil Lines | 27.205668 | 78.004799 | 282002.0 | 1 | Civil Lines, Agra | Desserts, Bakery, Fast Food, South Indian | 8am – 11pm (Mon-Sun) | 300 | 1 | Rs. | ['Takeaway Available', 'Credit Card', 'Lunch',... | 4.3 | Very Good | 693 | 157 | 0.0 | 1 | -1 |
| 4 | 3401744 | The Salt Cafe Kitchen & Bar | ['Casual Dining'] | https://www.zomato.com/agra/the-salt-cafe-kitc... | 1C,3rd Floor, Fatehabad Road, Tajganj, Agra | Agra | 34 | Tajganj | 27.157709 | 78.052421 | NaN | 1 | Tajganj, Agra | North Indian, Continental, Italian | 11:30 AM to 11:30 PM | 1000 | 3 | Rs. | ['Lunch', 'Serves Alcohol', 'Cash', 'Credit Ca... | 4.9 | Excellent | 470 | 291 | 0.0 | 1 | -1 |
zomato.shape
(55568, 26)
zomato.info()
<class 'pandas.core.frame.DataFrame'> Index: 55568 entries, 0 to 211942 Data columns (total 26 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 res_id 55568 non-null int64 1 name 55568 non-null object 2 establishment 55568 non-null object 3 url 55568 non-null object 4 address 55550 non-null object 5 city 55568 non-null object 6 city_id 55568 non-null int64 7 locality 55568 non-null object 8 latitude 55568 non-null float64 9 longitude 55568 non-null float64 10 zipcode 10945 non-null object 11 country_id 55568 non-null int64 12 locality_verbose 55568 non-null object 13 cuisines 55098 non-null object 14 timings 54565 non-null object 15 average_cost_for_two 55568 non-null int64 16 price_range 55568 non-null int64 17 currency 55568 non-null object 18 highlights 55568 non-null object 19 aggregate_rating 55568 non-null float64 20 rating_text 55568 non-null object 21 votes 55568 non-null int64 22 photo_count 55568 non-null int64 23 opentable_support 55556 non-null float64 24 delivery 55568 non-null int64 25 takeaway 55568 non-null int64 dtypes: float64(4), int64(9), object(13) memory usage: 11.4+ MB
zomato.columns.str.strip()
Index(['res_id', 'name', 'establishment', 'url', 'address', 'city', 'city_id',
'locality', 'latitude', 'longitude', 'zipcode', 'country_id',
'locality_verbose', 'cuisines', 'timings', 'average_cost_for_two',
'price_range', 'currency', 'highlights', 'aggregate_rating',
'rating_text', 'votes', 'photo_count', 'opentable_support', 'delivery',
'takeaway'],
dtype='object')
colname = zomato.columns
for i in range(len(colname)):
print(str(i)+"."+colname[i])
0.res_id 1.name 2.establishment 3.url 4.address 5.city 6.city_id 7.locality 8.latitude 9.longitude 10.zipcode 11.country_id 12.locality_verbose 13.cuisines 14.timings 15.average_cost_for_two 16.price_range 17.currency 18.highlights 19.aggregate_rating 20.rating_text 21.votes 22.photo_count 23.opentable_support 24.delivery 25.takeaway
We have 26 columns in the dataframe. We can check each each column and decide weather we need it for our analysis.
zomato.isna().sum()
res_id 0 name 0 establishment 0 url 0 address 18 city 0 city_id 0 locality 0 latitude 0 longitude 0 zipcode 44623 country_id 0 locality_verbose 0 cuisines 470 timings 1003 average_cost_for_two 0 price_range 0 currency 0 highlights 0 aggregate_rating 0 rating_text 0 votes 0 photo_count 0 opentable_support 12 delivery 0 takeaway 0 dtype: int64
There 5 columns which have missing values.
zomato.dtypes
res_id int64 name object establishment object url object address object city object city_id int64 locality object latitude float64 longitude float64 zipcode object country_id int64 locality_verbose object cuisines object timings object average_cost_for_two int64 price_range int64 currency object highlights object aggregate_rating float64 rating_text object votes int64 photo_count int64 opentable_support float64 delivery int64 takeaway int64 dtype: object
zomato['establishment'].unique()
array(["['Quick Bites']", "['Casual Dining']", "['Bakery']", "['Café']",
"['Dhaba']", "['Bhojanalya']", "['Bar']", "['Sweet Shop']",
"['Fine Dining']", "['Food Truck']", "['Dessert Parlour']",
"['Lounge']", "['Pub']", "['Beverage Shop']", "['Kiosk']",
"['Paan Shop']", "['Confectionery']", '[]', "['Shack']",
"['Club']", "['Food Court']", "['Mess']", "['Butcher Shop']",
"['Microbrewery']", "['Cocktail Bar']", "['Pop up']",
"['Irani Cafe']"], dtype=object)
zomato["establishment"] = zomato["establishment"].apply(lambda x:x[2:-2])
zomato['establishment'] = zomato['establishment'].apply(lambda x: np.where(x=="", "Not Avalible", x))
zomato['establishment'].unique()
array(['Quick Bites', 'Casual Dining', 'Bakery', 'Café', 'Dhaba',
'Bhojanalya', 'Bar', 'Sweet Shop', 'Fine Dining', 'Food Truck',
'Dessert Parlour', 'Lounge', 'Pub', 'Beverage Shop', 'Kiosk',
'Paan Shop', 'Confectionery', 'Not Avalible', 'Shack', 'Club',
'Food Court', 'Mess', 'Butcher Shop', 'Microbrewery',
'Cocktail Bar', 'Pop up', 'Irani Cafe'], dtype=object)
zomato['establishment'].value_counts()
establishment Quick Bites 14032 Casual Dining 12270 Café 4123 Bakery 3741 Dessert Parlour 3675 Sweet Shop 2615 Beverage Shop 2440 Not Avalible 1830 Fine Dining 1535 Food Court 1494 Bar 1399 Dhaba 1282 Kiosk 1126 Food Truck 868 Lounge 820 Bhojanalya 632 Mess 361 Pub 357 Paan Shop 320 Confectionery 218 Butcher Shop 154 Club 112 Microbrewery 110 Shack 20 Cocktail Bar 16 Irani Cafe 14 Pop up 4 Name: count, dtype: int64
zomato['address'].fillna('No address provided', inplace=True)
zomato['address'].isna().sum()
0
zomato['city'].unique()
array(['Agra', 'Ahmedabad', 'Gandhinagar', 'Ajmer', 'Alappuzha',
'Allahabad', 'Amravati', 'Amritsar', 'Aurangabad', 'Bangalore',
'Bhopal', 'Bhubaneshwar', 'Chandigarh', 'Mohali', 'Panchkula',
'Zirakpur', 'Nayagaon', 'Chennai', 'Coimbatore', 'Cuttack',
'Darjeeling', 'Dehradun', 'New Delhi', 'Gurgaon', 'Noida',
'Faridabad', 'Ghaziabad', 'Greater Noida', 'Dharamshala',
'Gangtok', 'Goa', 'Gorakhpur', 'Guntur', 'Guwahati', 'Gwalior',
'Haridwar', 'Hyderabad', 'Secunderabad', 'Indore', 'Jabalpur',
'Jaipur', 'Jalandhar', 'Jammu', 'Jamnagar', 'Jamshedpur', 'Jhansi',
'Jodhpur', 'Junagadh', 'Kanpur', 'Kharagpur', 'Kochi', 'Kolhapur',
'Kolkata', 'Howrah', 'Kota', 'Lucknow', 'Ludhiana', 'Madurai',
'Manali', 'Mangalore', 'Manipal', 'Udupi', 'Meerut', 'Mumbai',
'Thane', 'Navi Mumbai', 'Mussoorie', 'Mysore', 'Nagpur',
'Nainital', 'Nasik', 'Nashik', 'Neemrana', 'Ooty', 'Palakkad',
'Patiala', 'Patna', 'Puducherry', 'Pune', 'Pushkar', 'Raipur',
'Rajkot', 'Ranchi', 'Rishikesh', 'Salem', 'Shimla', 'Siliguri',
'Srinagar', 'Surat', 'Thrissur', 'Tirupati', 'Trichy',
'Trivandrum', 'Udaipur', 'Varanasi', 'Vellore', 'Vijayawada',
'Vizag', 'Vadodara'], dtype=object)
len(zomato['city'].unique())
99
zomato['city'].value_counts()
city
Bangalore 2247
Mumbai 2022
Pune 1843
Chennai 1827
New Delhi 1704
...
Udupi 60
Howrah 50
Neemrana 26
Greater Noida 21
Nayagaon 15
Name: count, Length: 99, dtype: int64
len(zomato['city_id'].unique())
83
zomato['locality'].nunique()
3731
zomato.drop(columns=['zipcode'], inplace=True)
zomato['country_id'].unique()
array([1], dtype=int64)
zomato["locality_verbose"].nunique()
3910
Contain 470 missing vaules.
zomato["cuisines"].nunique()
9382
zomato["cuisines"].unique()
array(['North Indian, South Indian, Mithai, Street Food, Desserts',
'North Indian, Mughlai, Rolls, Chinese, Fast Food, Street Food',
'Fast Food, Mithai', ...,
'Street Food, Biryani, Chinese, Fast Food, North Indian, Mughlai',
'North Indian, Chinese, Mexican, Italian, Thai, Continental',
'North Indian, Lucknowi, Chinese'], dtype=object)
zomato['cuisines'] = zomato['cuisines'].fillna('Not Specified')
zomato['cuisines'].isna().sum()
0
Extracting and analyzing the unique cuisines listed in a DataFrame
cuisines = []
zomato['cuisines'].apply(lambda x : cuisines.extend(x.split(', ')))
cuisines = pd.Series(cuisines)
cuisines.nunique()
134
It is missing 1003 values. It is also not structured and is not very usefull for the analysis so we can remove it.
zomato['timings'].isna().sum()
1003
zomato['timings'].unique()
array(['8:30am – 10:30pm (Mon-Sun)',
'12:30PM to 12Midnight (Mon, Wed, Thu, Fri, Sat, Sun), 1PM to 12Midnight (Tue)',
'9:30 AM to 11 PM', ..., '8am – 2:30pm, 5pm – 10pm (Mon-Sun)',
'8am – 3pm, 6:30pm – 11pm (Mon-Sun)',
'4pm – 11pm (Mon, Tue, Wed, Fri, Sat, Sun), 4pm – 11:30pm (Thu)'],
dtype=object)
zomato['timings'].nunique()
7740
zomato["average_cost_for_two"].nunique()
145
zomato['price_range'].unique()
array([2, 1, 3, 4], dtype=int64)
zomato['highlights'].unique()
array(["['Lunch', 'Takeaway Available', 'Credit Card', 'Dinner', 'Cash', 'Air Conditioned', 'Indoor Seating', 'Pure Veg']",
"['Delivery', 'No Alcohol Available', 'Dinner', 'Takeaway Available', 'Lunch', 'Cash', 'Indoor Seating']",
"['No Alcohol Available', 'Dinner', 'Takeaway Available', 'Breakfast', 'Lunch', 'Cash', 'Delivery', 'Outdoor Seating', 'Air Conditioned', 'Self Service', 'Indoor Seating', 'Digital Payments Accepted', 'Pure Veg', 'Desserts and Bakes']",
...,
"['Dinner', 'Delivery', 'Cash', 'Takeaway Available', 'Free Parking', 'Digital Payments Accepted', 'Pure Veg', 'Indoor Seating']",
"['Dinner', 'Cash', 'Takeaway Available', 'Lunch', 'Delivery', 'Free Parking', 'Indoor Seating', 'Air Conditioned', 'Outdoor Seating', 'Digital Payments Accepted', 'Catering Available', 'Pure Veg']",
"['Dinner', 'Cash', 'Takeaway Available', 'Debit Card', 'Delivery', 'Credit Card', 'Free Parking', 'Outdoor Seating']"],
dtype=object)
zomato['highlights'].nunique()
31228
highlight = []
zomato["highlights"].apply(lambda x : highlight.extend(x[2:-2].split("', '")))
highlight = pd.Series(highlight)
highlight.nunique()
104
zomato['highlights']
0 ['Lunch', 'Takeaway Available', 'Credit Card',...
1 ['Delivery', 'No Alcohol Available', 'Dinner',...
2 ['No Alcohol Available', 'Dinner', 'Takeaway A...
3 ['Takeaway Available', 'Credit Card', 'Lunch',...
4 ['Lunch', 'Serves Alcohol', 'Cash', 'Credit Ca...
...
211882 ['Cash', 'Takeaway Available', 'Delivery', 'In...
211925 ['Dinner', 'Cash', 'Debit Card', 'Lunch', 'Tak...
211926 ['Dinner', 'Cash', 'Credit Card', 'Lunch', 'Ta...
211940 ['Dinner', 'Cash', 'Takeaway Available', 'Debi...
211942 ['Dinner', 'Delivery', 'Credit Card', 'Lunch',...
Name: highlights, Length: 55568, dtype: object
zomato['rating_text'].unique()
array(['Very Good', 'Excellent', 'Good', 'Average', 'Not rated', 'Poor',
'Bardzo dobrze', 'Muito Bom', 'İyi', 'Çok iyi', 'Baik',
'Sangat Baik', 'Skvělé', 'Muy Bueno', 'Průměr', 'Ortalama',
'Skvělá volba', 'Muito bom', 'Velmi dobré', 'Excelente', 'Bom',
'Scarso', 'Promedio', 'Vynikajúce', 'Bueno', 'Dobré', 'Terbaik',
'Harika', 'Veľmi dobré', 'Eccellente', 'Buono', 'Média', 'Dobrze'],
dtype=object)
Mapping non-english word and converting it to english by using dictonary.
rating_translation = {
'Dobré': 'Good', 'Baik': 'Good', 'Sangat Baik': 'Very Good',
'Excelente': 'Excellent', 'Bardzo dobrze': 'Very Good',
'Wybitnie': 'Excellent', 'Ottimo': 'Excellent', 'Muito Bom': 'Very Good',
'Velmi dobré': 'Very Good', 'Skvělá volba': 'Excellent',
'Muy Bueno': 'Very Good', 'Bom': 'Good', 'İyi': 'Good', 'Çok iyi': 'Very Good',
'Harika': 'Excellent', 'Terbaik': 'Excellent', 'Skvělé': 'Excellent', 'Průměr': 'Average',
'Ortalama': 'Average', 'Bueno': 'Good', 'Eccellente': 'Excellent',
'Muito bom': 'Very Good', 'Dobrze': 'Good', 'Buono': 'Good', 'Média': 'Average',
'Scarso': 'Poor', 'Promedio': 'Average', 'Veľmi dobré': 'Very Good',
'Vynikajúce': 'Excellent', 'Średnio': 'Average', 'Priemer': 'Average',
'Biasa': 'Average', 'Media': 'Average'
}
zomato['rating_text'] = zomato['rating_text'].replace(rating_translation)
zomato['rating_text'].unique()
array(['Very Good', 'Excellent', 'Good', 'Average', 'Not rated', 'Poor'],
dtype=object)
missing 12 vulues, replacing missing with 0
zomato['opentable_support'].unique()
array([ 0., nan])
zomato['opentable_support'].fillna(0, inplace=True)
zomato['opentable_support'].unique()
array([0.])
zomato["delivery"].unique()
array([-1, 1, 0], dtype=int64)
zomato["takeaway"].unique()
array([-1], dtype=int64)
zomato.head()
| res_id | name | establishment | url | address | city | city_id | locality | latitude | longitude | country_id | locality_verbose | cuisines | timings | average_cost_for_two | price_range | currency | highlights | aggregate_rating | rating_text | votes | photo_count | opentable_support | delivery | takeaway | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 3400299 | Bikanervala | Quick Bites | https://www.zomato.com/agra/bikanervala-khanda... | Kalyani Point, Near Tulsi Cinema, Bypass Road,... | Agra | 34 | Khandari | 27.211450 | 78.002381 | 1 | Khandari, Agra | North Indian, South Indian, Mithai, Street Foo... | 8:30am – 10:30pm (Mon-Sun) | 700 | 2 | Rs. | ['Lunch', 'Takeaway Available', 'Credit Card',... | 4.4 | Very Good | 814 | 154 | 0.0 | -1 | -1 |
| 1 | 3400005 | Mama Chicken Mama Franky House | Quick Bites | https://www.zomato.com/agra/mama-chicken-mama-... | Main Market, Sadar Bazaar, Agra Cantt, Agra | Agra | 34 | Agra Cantt | 27.160569 | 78.011583 | 1 | Agra Cantt, Agra | North Indian, Mughlai, Rolls, Chinese, Fast Fo... | 12:30PM to 12Midnight (Mon, Wed, Thu, Fri, Sat... | 600 | 2 | Rs. | ['Delivery', 'No Alcohol Available', 'Dinner',... | 4.4 | Very Good | 1203 | 161 | 0.0 | -1 | -1 |
| 2 | 3401013 | Bhagat Halwai | Quick Bites | https://www.zomato.com/agra/bhagat-halwai-2-sh... | 62/1, Near Easy Day, West Shivaji Nagar, Goalp... | Agra | 34 | Shahganj | 27.182938 | 77.979684 | 1 | Shahganj, Agra | Fast Food, Mithai | 9:30 AM to 11 PM | 300 | 1 | Rs. | ['No Alcohol Available', 'Dinner', 'Takeaway A... | 4.2 | Very Good | 801 | 107 | 0.0 | 1 | -1 |
| 3 | 3400290 | Bhagat Halwai | Quick Bites | https://www.zomato.com/agra/bhagat-halwai-civi... | Near Anjana Cinema, Nehru Nagar, Civil Lines, ... | Agra | 34 | Civil Lines | 27.205668 | 78.004799 | 1 | Civil Lines, Agra | Desserts, Bakery, Fast Food, South Indian | 8am – 11pm (Mon-Sun) | 300 | 1 | Rs. | ['Takeaway Available', 'Credit Card', 'Lunch',... | 4.3 | Very Good | 693 | 157 | 0.0 | 1 | -1 |
| 4 | 3401744 | The Salt Cafe Kitchen & Bar | Casual Dining | https://www.zomato.com/agra/the-salt-cafe-kitc... | 1C,3rd Floor, Fatehabad Road, Tajganj, Agra | Agra | 34 | Tajganj | 27.157709 | 78.052421 | 1 | Tajganj, Agra | North Indian, Continental, Italian | 11:30 AM to 11:30 PM | 1000 | 3 | Rs. | ['Lunch', 'Serves Alcohol', 'Cash', 'Credit Ca... | 4.9 | Excellent | 470 | 291 | 0.0 | 1 | -1 |
zomato.shape
(55568, 25)
summary_stats = zomato.describe()
summary_stats
| res_id | city_id | latitude | longitude | country_id | average_cost_for_two | price_range | aggregate_rating | votes | photo_count | opentable_support | delivery | takeaway | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 5.556800e+04 | 55568.000000 | 55568.000000 | 55568.000000 | 55568.0 | 55568.000000 | 55568.000000 | 55568.000000 | 55568.000000 | 55568.000000 | 55568.0 | 55568.000000 | 55568.0 |
| mean | 1.313694e+07 | 3409.499298 | 21.450847 | 76.497131 | 1.0 | 528.205874 | 1.714728 | 2.958593 | 223.330352 | 160.974770 | 0.0 | -0.349824 | -1.0 |
| std | 8.105959e+06 | 5174.942737 | 42.901135 | 10.982976 | 0.0 | 595.028447 | 0.878227 | 1.464576 | 618.224019 | 586.975382 | 0.0 | 0.933794 | 0.0 |
| min | 5.000000e+01 | 1.000000 | 0.000000 | 0.000000 | 1.0 | 0.000000 | 1.000000 | 0.000000 | -18.000000 | 0.000000 | 0.0 | -1.000000 | -1.0 |
| 25% | 3.001352e+06 | 8.000000 | 16.518374 | 74.645885 | 1.0 | 200.000000 | 1.000000 | 2.900000 | 6.000000 | 1.000000 | 0.0 | -1.000000 | -1.0 |
| 50% | 1.869268e+07 | 26.000000 | 22.468629 | 77.106348 | 1.0 | 350.000000 | 1.000000 | 3.500000 | 35.000000 | 10.000000 | 0.0 | -1.000000 | -1.0 |
| 75% | 1.887262e+07 | 11294.000000 | 26.752959 | 79.831641 | 1.0 | 600.000000 | 2.000000 | 3.900000 | 175.000000 | 69.000000 | 0.0 | 1.000000 | -1.0 |
| max | 1.915979e+07 | 11354.000000 | 10000.000000 | 91.832769 | 1.0 | 30000.000000 | 4.000000 | 4.900000 | 42539.000000 | 17702.000000 | 0.0 | 1.000000 | -1.0 |
full_summary_stats_categorical = zomato.describe(include=[object, 'category'])
full_summary_stats_categorical
| name | establishment | url | address | city | locality | locality_verbose | cuisines | timings | currency | highlights | rating_text | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 55568 | 55568 | 55568 | 55568 | 55568 | 55568 | 55568 | 55568 | 54565 | 55568 | 55568 | 55568 |
| unique | 41100 | 27 | 55568 | 50658 | 99 | 3731 | 3910 | 9383 | 7740 | 1 | 31228 | 6 |
| top | Domino's Pizza | Quick Bites | https://www.zomato.com/agra/bikanervala-khanda... | Laxman Jhula, Tapovan, Rishikesh | Bangalore | Civil Lines | Gomti Nagar, Lucknow | North Indian | 11 AM to 11 PM | Rs. | ['Dinner', 'Takeaway Available', 'Lunch', 'Cas... | Average |
| freq | 399 | 14032 | 1 | 37 | 2247 | 752 | 274 | 4295 | 7012 | 55568 | 860 | 16323 |
zomato['aggregate_rating'].hist(bins=20, edgecolor='black')
plt.title('Ratings Distribution')
plt.xlabel('Ratings')
plt.ylabel('Number of Restaurants')
plt.show()
sns.kdeplot(zomato['aggregate_rating'], fill=True)
plt.title("Ratings distribution")
plt.show()
plt.figure(figsize=(10, 10))
plt.hist(zomato['average_cost_for_two'], bins=50, edgecolor='black', alpha=0.7)
plt.title('Distribution of Average Cost for Two')
plt.xlabel('Average Cost for Two')
plt.ylabel('Frequency')
plt.xlim(0, 5000)
plt.show()
sns.kdeplot(zomato['average_cost_for_two'], fill=True)
plt.xlim([0, 6000])
plt.xticks(range(0,6000,500))
plt.title("distribution of Average cost for two ")
plt.show()
price_counts = zomato['price_range'].value_counts()
sns.barplot(x=price_counts.index, y=price_counts.values)
for index, value in enumerate(price_counts.values):
plt.text(index, value, str(value), color='black', ha="center")
plt.title('Distribution of Price Range')
plt.xlabel('Price Range')
plt.ylabel('Number of Restaurants')
plt.show()
zomato['price_range'].value_counts().plot(kind='pie', autopct='%1.1f%%')
plt.title('Pie Chart of Price Range Distribution')
plt.ylabel('') # Hide y-label as it's redundant in pie charts
plt.show()
correlation_matrix = zomato[['aggregate_rating', 'average_cost_for_two', 'price_range', 'votes', 'photo_count']].corr()
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f", linewidths=.5)
plt.title('Correlation Matrix')
plt.show()
zomato['cuisines'].unique()
array(['North Indian, South Indian, Mithai, Street Food, Desserts',
'North Indian, Mughlai, Rolls, Chinese, Fast Food, Street Food',
'Fast Food, Mithai', ...,
'Street Food, Biryani, Chinese, Fast Food, North Indian, Mughlai',
'North Indian, Chinese, Mexican, Italian, Thai, Continental',
'North Indian, Lucknowi, Chinese'], dtype=object)
cuisines_expanded = zomato['cuisines'].str.split(', ').explode()
cuisine_counts = cuisines_expanded.value_counts()
top_cuisines = cuisine_counts.head(10)
plt.figure(figsize=(12, 8))
sns.barplot(x=top_cuisines.values, y=top_cuisines.index)
plt.title('Top 10 Cuisines by Number of Restaurants')
plt.xlabel('Number of Restaurants')
plt.ylabel('Cuisines')
plt.show()
establishment_counts = zomato['establishment'].value_counts()
# Plotting
plt.figure(figsize=(10, 6))
sns.barplot(x=establishment_counts.values, y=establishment_counts.index, palette='pastel')
plt.title('Distribution of Establishment')
plt.xlabel('Number of Establishments')
plt.ylabel('Types of Establishments')
plt.show()
zomato['name'].value_counts()
name
Domino's Pizza 399
Cafe Coffee Day 315
KFC 204
Baskin Robbins 202
Keventers 189
...
Boulevard 69 1
7-Eleven Restaurant 1
Food station 1
Silver Saloon - Taj Usha Kiran Palace 1
Geeta lodge 1
Name: count, Length: 41100, dtype: int64
Some of the places have many chains. let seprate chains from single resturunt.
places = zomato['name'].value_counts()
places
name
Domino's Pizza 399
Cafe Coffee Day 315
KFC 204
Baskin Robbins 202
Keventers 189
...
Boulevard 69 1
7-Eleven Restaurant 1
Food station 1
Silver Saloon - Taj Usha Kiran Palace 1
Geeta lodge 1
Name: count, Length: 41100, dtype: int64
chains = places[places >= 2]
single = places[places == 1]
print("Total Number of Restaurants: ", zomato.shape[0])
print("Number of single places: ", single.count())
print("Number of chains : ", zomato.shape[0]-single.shape[0])
print("Percentage of Restaurants(chains) : ", np.round((zomato.shape[0] - single.shape[0]) / zomato.shape[0], 2)* 100, '%')
Total Number of Restaurants: 55568 Number of single places: 36210 Number of chains : 19358 Percentage of Restaurants(chains) : 35.0 %
top10_chains = zomato["name"].value_counts()[:10].sort_values(ascending=True)
plt.figure(figsize=[11,7])
bars = plt.barh(top10_chains.index, top10_chains.values)
for bar in bars:
plt.text(bar.get_width() + 3, bar.get_y() + bar.get_height()/2, f'{bar.get_width()}',
va='center', color='black')
plt.xlabel("No. of outlets")
plt.title("Top 10 Restaurant Chains by number of outlits")
# Show the plot
plt.show()
top10_chains = zomato["name"].value_counts()[zomato["name"].value_counts() > 4].index[:10]
top10_ratings = zomato[zomato["name"].isin(top10_chains)].groupby("name")["aggregate_rating"].mean().sort_values()
plt.figure(figsize=[11,7])
bars = plt.barh(top10_ratings.index, top10_ratings.values)
# Adding ratings to the bars
for i, v in enumerate(top10_ratings.values):
plt.text(v + 0.01, i, f'{v:.2f}', color='black')
# Labeling and title
plt.xlabel("Average Rating")
plt.ylabel("Restaurant Chain")
plt.title("Top 10 Restaurant Chains by Average Rating)")
# Show the plot
plt.show()
city_counts = zomato.groupby("city").count()["res_id"].nlargest(10).sort_values()
plt.figure(figsize=[11,7])
bars = plt.barh(city_counts.index, city_counts.values)
for bar in bars:
plt.text(bar.get_width() + 20, bar.get_y() + bar.get_height()/2, f'{bar.get_width()}',
va='center', color='black')
plt.xlabel("No. of Restaurants")
plt.title("Top 10 Cities by No. of Restaurants")
plt.show()
est_count = zomato.groupby("establishment").count()["res_id"].nlargest(10)
sns.barplot(y=est_count.index, x=est_count.values)
plt.xlabel("No. of Restaurants")
plt.title("Number of Restaurants (by Establishment)")
plt.show()
zomato['cuisines']
0 North Indian, South Indian, Mithai, Street Foo...
1 North Indian, Mughlai, Rolls, Chinese, Fast Fo...
2 Fast Food, Mithai
3 Desserts, Bakery, Fast Food, South Indian
4 North Indian, Continental, Italian
...
211882 Ice Cream
211925 Gujarati, North Indian, Chinese
211926 Gujarati, Street Food
211940 Fast Food
211942 Fast Food, Sandwich, Salad
Name: cuisines, Length: 55568, dtype: object
df_exploded = zomato.explode('cuisines')
df_exploded['aggregate_rating'] = pd.to_numeric(df_exploded['aggregate_rating'], errors='coerce')
df_exploded['average_cost_for_two'] = pd.to_numeric(df_exploded['average_cost_for_two'], errors='coerce')
competitors = df_exploded.groupby(['city', 'cuisines']).agg({
'aggregate_rating': 'mean',
'average_cost_for_two': 'mean',
'name': 'size'
}).rename(columns={'name': 'restaurant_count'}).reset_index()
top_competitors = competitors.sort_values(by=['city', 'aggregate_rating', 'restaurant_count'], ascending=[True, False, False])
top_competitors = top_competitors.groupby('city').head(5)
for city, sub_df in top_competitors.groupby('city'):
plt.figure(figsize=(10, 6))
sns.barplot(data=sub_df, x='aggregate_rating', y='cuisines', hue='average_cost_for_two')
plt.title(f'Top Competitors in {city}')
plt.xlabel('average Rating')
plt.ylabel('cuisine')
plt.legend(title='average Cost for Two')
plt.show()
strengths = top_competitors[(top_competitors['aggregate_rating'] >= 4) & (top_competitors['restaurant_count'] > top_competitors['restaurant_count'].median())]
# Weaknesses (lower ratings, fewer counts, high costs)
weaknesses = competitors[(competitors['aggregate_rating'] < 3.5) & (competitors['restaurant_count'] <= competitors['restaurant_count'].median()) & (competitors['average_cost_for_two'] > competitors['average_cost_for_two'].median())]
# Display strengths and weaknesses
print("Identified Strengths:")
print(strengths)
print("\nIdentified Weaknesses:")
print(weaknesses)
Identified Strengths:
city cuisines \
1077 Amravati North Indian, South Indian
2804 Bhopal Street Food, South Indian, Fast Food, Desserts...
2491 Bhopal Bakery, Chinese, Continental, Pizza, Italian, ...
3940 Chennai North Indian, Mediterranean, Asian, Arabian, BBQ
3463 Chennai BBQ, North Indian, European, Beverages
3855 Chennai Mexican, American, Tex-Mex, Burger
5850 Greater Noida Burger, Fast Food, Beverages
6423 Guwahati Cafe, Italian
6759 Gwalior North Indian, Biryani, Chinese
7214 Hyderabad European, Mediterranean, North Indian
7020 Hyderabad American
8582 Jammu Desserts, Fast Food, Chinese, North Indian, Mi...
9470 Junagadh Gujarati
9475 Junagadh Ice Cream, Desserts
9594 Kanpur Fast Food, Italian
10266 Kolhapur Ice Cream, Desserts
11287 Lucknow Modern Indian
11757 Madurai Burger, Fast Food
11778 Madurai Chinese, South Indian, North Indian
11748 Madurai Biryani, Arabian, Chinese
12106 Mangalore Ice Cream, Desserts, Beverages, Fast Food
13904 Nagpur Beverages, Desserts
15092 New Delhi Italian, Finger Food
15541 Noida North Indian, Mughlai, Biryani, Rolls
17779 Rajkot Sandwich, Burmese, Chinese
17661 Rajkot Burger, Fast Food
aggregate_rating average_cost_for_two restaurant_count
1077 4.100000 500.000000 2
2804 4.650000 400.000000 2
2491 4.500000 500.000000 3
3940 4.900000 1400.000000 3
3463 4.820000 1400.000000 5
3855 4.800000 1400.000000 2
5850 4.050000 450.000000 2
6423 4.600000 800.000000 2
6759 4.500000 400.000000 2
7214 4.850000 1500.000000 4
7020 4.800000 1600.000000 2
8582 4.300000 300.000000 2
9470 4.150000 225.000000 2
9475 4.033333 200.000000 3
9594 4.550000 250.000000 2
10266 4.200000 275.000000 2
11287 4.750000 1300.000000 2
11757 4.450000 450.000000 2
11778 4.200000 200.000000 2
11748 4.150000 350.000000 2
12106 4.666667 283.333333 3
13904 4.650000 375.000000 2
15092 4.900000 2000.000000 2
15541 4.633333 516.666667 3
17779 4.300000 200.000000 2
17661 4.200000 400.000000 2
Identified Weaknesses:
city cuisines \
1 Agra Asian, North Indian, Chinese
2 Agra Asian, North Indian, Chinese, South Indian, It...
3 Agra Asian, South Indian, Continental, Chinese, Nor...
6 Agra Bakery, Beverages, Cafe, Coffee, Fast Food
14 Agra Bar Food
... ... ...
21295 Zirakpur North Indian, Chinese, Continental, Biryani
21296 Zirakpur North Indian, Chinese, Continental, Mexican
21305 Zirakpur North Indian, Continental, Chinese
21306 Zirakpur North Indian, Continental, Finger Food
21309 Zirakpur North Indian, Mughlai
aggregate_rating average_cost_for_two restaurant_count
1 0.0 1200.0 1
2 0.0 1200.0 1
3 0.0 2000.0 1
6 3.3 500.0 1
14 0.0 1500.0 1
... ... ... ...
21295 3.2 1500.0 1
21296 3.4 1600.0 1
21305 2.7 600.0 1
21306 3.2 1200.0 1
21309 2.4 650.0 1
[1956 rows x 5 columns]